library(readr)
survey_data=read_csv("Survey_Data.csv")

manipulation=subset(survey_data, survey_data$FL_8_DO=="FL_9")

control=subset(survey_data, survey_data$FL_8_DO=="FL_12")

manipulation$electioncare=manipulation$Q18
control$electioncare=control$Q18

manipulation$politicinterest=manipulation$Q19
control$politicinterest=control$Q19

manipulation$vote=8-manipulation$Q20
control$vote=8-control$Q20

t.test(manipulation$electioncare, control$electioncare, alternative="two.sided", var.equal=FALSE)
#p-value 0.09642
#manipulation mean: 2.082237
#control mean 1.990000
t.test(manipulation$politicinterest, control$politicinterest, alternative="two.sided", var.equal=FALSE)
#p-value 0.1062
#manipulation mean: 2.606908
#control mean: 2.506667
t.test(manipulation$vote, control$vote, alternative="two.sided", var.equal=FALSE)
#p-value 0.01541
#manipulation mean: 6.583882
#control mean: 6.431667

t.test(manipulation$electioncare)
t.test(control$electioncare)

t.test(manipulation$politicinterest)
t.test(control$politicinterest)

t.test(manipulation$vote)
t.test(control$vote)

careelections=data.frame(Category=c("Manipulation", "Control"),
                         Mean=c(2.082237, 1.990000),
                         Error=c(0.077981, 0.075981))

interestpolitics=data.frame(Category=c("Manipulation", "Control"),
                            Mean=c(2.606908, 2.506667), 
                            Error=c(0.087406,0.0847945))

willvote=data.frame(Category=c("Manipulation", "Control"),
                    Mean=c(6.583882, 6.431667),
                    Error=c(0.080216, 0.093534))

library(ggplot2)

plotelections=ggplot(data=careelections, aes(x=Category, y=Mean, fill=Category )) +
  geom_bar(stat="identity", width = 0.5, fill=c("#C8102E", "#F1BE48")) +
  geom_errorbar(aes(ymin=Mean-Error,ymax=Mean+Error), width=0.3) +
  theme_classic() +
  theme(text=element_text(size=20))+
  ggtitle("Mean Care About Election Outcomes")
plotelections

plotinterest=ggplot(data=interestpolitics, aes(x=Category, y=Mean, fill=Category )) +
  geom_bar(stat="identity", width = 0.5, fill=c("#C8102E", "#F1BE48")) +
  geom_errorbar(aes(ymin=Mean-Error,ymax=Mean+Error), width=0.3)+
  theme_classic() +
  theme(text=element_text(size=20))+
  ggtitle("Mean Interest in Politics")
plotinterest

plotvote=ggplot(data=willvote, aes(x=Category, y=Mean, fill=Category )) +
  geom_bar(stat="identity", width = 0.5, fill=c("#C8102E", "#F1BE48")) +
  geom_errorbar(aes(ymin=Mean-Error,ymax=Mean+Error), width=0.3) +
  theme_classic() +
  theme(text=element_text(size=20))+
  ggtitle("Mean Likelihood of Voting")
plotvote

t.test(as.numeric(manipulation$Age), as.numeric(control$Age), alternative="two.sided", var.equal=FALSE)
#p-value 0.9065
t.test(manipulation$Gender, control$Gender, alternative="two.sided", var.equal = FALSE)
#p value 0.158
t.test(manipulation$Hispanic, control$Hispanic, alternative="two.sided", var.equal = FALSE)
#p value 0.3316
t.test(manipulation$Race, control$Race, alternative="two.sided", var.equal = FALSE)
#p value 0.3169
t.test(manipulation$`Dem/GOP`, control$`Dem/GOP`, alternative="two.sided", var.equal = FALSE)
#p value 0.4884
t.test(manipulation$`S/W Dem`, control$`S/W Dem`, alternative="two.sided", var.equal = FALSE)
#p value 0.7694
t.test(manipulation$`S/W GOP`, control$`S/W GOP`, alternative="two.sided", var.equal = FALSE)
#p value 0.4253
t.test(manipulation$`Politic Info`, control$`Politic Info`, alternative="two.sided", var.equal = FALSE)
#p value 0.2663
t.test(manipulation$`Church Y/N`, control$`Church Y/N`, alternative="two.sided", var.equal = FALSE)
#p value 0.7994
t.test(manipulation$`Church attendance`, control$`Church attendance`, alternative="two.sided", var.equal = FALSE)
#p value 0.6763
t.test(manipulation$Urban, control$Urban, alternative="two.sided", var.equal = FALSE)
#p value 0.3712

electionresponses=manipulation$Q29

library(SnowballC)
library(syuzhet)
library(tm)
library(wordcloud)

TextDoc <- Corpus(VectorSource(electionresponses))
TextDoc <- tm_map(TextDoc, content_transformer(tolower))
TextDoc <- tm_map(TextDoc, removeNumbers)
TextDoc <- tm_map(TextDoc, removeWords, stopwords("english"))
TextDoc <- tm_map(TextDoc, removeWords, c("trump"))
TextDoc <- tm_map(TextDoc, removePunctuation)
TextDoc <- tm_map(TextDoc, stripWhitespace)
TextDoc <- tm_map(TextDoc, stemDocument)

TextDoc_dtm <- TermDocumentMatrix(TextDoc)
dtm_m <- as.matrix(TextDoc_dtm)
dtm_v <- sort(rowSums(dtm_m),decreasing=TRUE)
dtm_d <- data.frame(word = names(dtm_v),freq=dtm_v)
head(dtm_d, 5)

set.seed(1234)
wordcloud(words = dtm_d$word, freq = dtm_d$freq, min.freq = 5,
          max.words=100, random.order=FALSE, rot.per=0.40, 
          colors=brewer.pal(8, "Set2"))

syuzhet_vector <- get_sentiment(electionresponses, method="syuzhet")
head(syuzhet_vector)
summary(syuzhet_vector)

bing_vector <- get_sentiment(electionresponses, method="bing")
head(bing_vector)
summary(bing_vector)

afinn_vector <- get_sentiment(electionresponses, method="afinn")
head(afinn_vector)
summary(afinn_vector)

manipulation$syuzhet=syuzhet_vector
manipulation$bing=bing_vector
manipulation$afinn=afinn_vector

modelselectioncare=lm(electioncare~abs(syuzhet), manipulation)
summary(modelselectioncare)
#coefficient -0.138, r^2 0.013
modelspoliticinterest=lm(politicinterest~abs(syuzhet), manipulation)
summary(modelspoliticinterest)
#coefficient -0.131, r^2 0.0095
modelsvote=lm(vote~abs(syuzhet), manipulation)
summary(modelsvote)
#coefficient -0.073, r^2 0.0035

modelbelectioncare=lm(electioncare~abs(bing), manipulation)
summary(modelbelectioncare)
#coefficient -0.127, r^2 0.023
modelbpoliticinterest=lm(politicinterest~abs(bing), manipulation)
summary(modelbpoliticinterest)
#coefficient -0.088, r^2 0.0087
modelbvote=lm(vote~abs(bing), manipulation)
summary(modelbvote)
#coefficient -0.11, r^2 0.016

modelaelectioncare=lm(electioncare~abs(afinn), manipulation)
#coefficient -0.059, r^2 0.023
modelapoliticinterest=lm(politicinterest~abs(afinn), manipulation)
#coefficient -0.048, r^2 0.012
modelavote=lm(vote~abs(afinn), manipulation)
#coefficient -0.041, r^2 0.011
summary(modelaelectioncare)
summary(modelapoliticinterest)
summary(modelavote)


##not absolute value
modelselectioncare=lm(electioncare~(syuzhet), manipulation)
summary(modelselectioncare)
#coefficient -0.138, r^2 0.013
modelspoliticinterest=lm(politicinterest~(syuzhet), manipulation)
summary(modelspoliticinterest)
#coefficient -0.131, r^2 0.0095
modelsvote=lm(vote~(syuzhet), manipulation)
summary(modelsvote)
#coefficient -0.073, r^2 0.0035

modelbelectioncare=lm(electioncare~(bing), manipulation)
summary(modelbelectioncare)
#coefficient -0.127, r^2 0.023
modelbpoliticinterest=lm(politicinterest~(bing), manipulation)
summary(modelbpoliticinterest)
#coefficient -0.088, r^2 0.0087
modelbvote=lm(vote~(bing), manipulation)
summary(modelbvote)
#coefficient -0.11, r^2 0.016

modelaelectioncare=lm(electioncare~(afinn), manipulation)
#coefficient -0.059, r^2 0.023
modelapoliticinterest=lm(politicinterest~(afinn), manipulation)
#coefficient -0.048, r^2 0.012
modelavote=lm(vote~(afinn), manipulation)
#coefficient -0.041, r^2 0.011
summary(modelaelectioncare)
summary(modelapoliticinterest)
summary(modelavote)

##MODEL IN THE REVISED PAPER:
modelavote2=lm(vote~abs(afinn)+Gender+Hispanic+Race+`Dem/GOP`+`Church Y/N`+Urban, manipulation)
summary(modelavote2)
